You can load data from CouchDB or a managed Cloudant instance using the Cloudant Spark connector.
Collect your database connection information: the database host, user name, password and source database.
Import PixieDust and enable the Apache Spark Job monitor
In [7]:
import pixiedust
pixiedust.enableJobMonitor()
In [8]:
# @hidden_cell
# Enter your Cloudant host name
host = '...'
# Enter your Cloudant user name
username = '...'
# Enter your Cloudant password
password = '...'
# Enter your source database name
database = '...'
In [6]:
# no changes are required to this cell
# obtain Spark SQL Context
sqlContext = SQLContext(sc)
# load data
cloudant_data = sqlContext.read.format("com.cloudant.spark").\
option("cloudant.host", host).\
option("cloudant.username", username).\
option("cloudant.password", password).\
load(database)
In [9]:
display(cloudant_data)
In [ ]: